results_dir <- "results/final"
figures_dir <- "figures/final"
sourcedata_dir <- "source_data/final"
dir.create(results_dir, recursive = TRUE)
dir.create(figures_dir, recursive = TRUE)
dir.create(sourcedata_dir, recursive = TRUE)

1 Package

library(SingleCellExperiment)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggpubr)
library(moon) # Yingxin's personal package
library(pheatmap)
library(reshape2)
library(gridExtra)
library(RColorBrewer)
library(UpSetR)
library(scattermore)
library(scater)
library(scran)
library(ggridges)
library(rcartocolor)
library(Rtsne)
library(ggalluvial)
library(ggrepel)
library(BiocParallel)
library(BiocSingular)
library(BiocNeighbors)
library(openxlsx)
library(org.Hs.eg.db)
ggplot2::theme_set(theme_bw() + theme_yx() + 
                     theme(axis.text.y = element_text(color = "black"),
                           axis.text.x = element_text(color = "black")) )
rdbu <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
reds <- colorRampPalette(c("white", brewer.pal(n = 7, name = "Reds")))(100)
blues <- colorRampPalette(c("white", brewer.pal(n = 7, name = "Blues")))(100)
mean_trim_low <- function(x, trim = 0) {
  mean(x[x >= quantile(x, trim)])
}
minMax <- function(x) {
  (x - min(x))/(max(x) - min(x))
}

library(fgsea)
runFGSEA <- function(stats, pathways, scoreType = "std") {
  fgseaRes <- fgsea(pathways = pathways,
                    stats    = sort(stats),
                    minSize = 5,
                    maxSize = 10000,
                    nproc = 1,
                    scoreType = scoreType)
  fgseaRes[order(fgseaRes$ES, decreasing = TRUE),]
}
library(org.Hs.eg.db)
library(clusterProfiler)
runGO <- function(gene_set, background, maxGSSize = 500) {
  eg <- bitr(gene_set,
             fromType = "SYMBOL", 
             toType = "ENTREZID", 
             OrgDb = "org.Hs.eg.db")
  
  geneList <- bitr(background,
                   fromType = "SYMBOL", 
                   toType = "ENTREZID", 
                   OrgDb = "org.Hs.eg.db")
  
  ego_res <- enrichGO(gene = eg$ENTREZID,
                      universe = geneList$ENTREZID,
                      OrgDb = org.Hs.eg.db,
                      ont = "BP",
                      pAdjustMethod = "BH",
                      minGSSize = 10,
                      maxGSSize = maxGSSize,
                      pvalueCutoff = 1,
                      qvalueCutoff = 1,
                      readable = TRUE)
  return(ego_res)
}
sce <- readRDS("source_data/final/sce_melanoma.rds")
colnames(sce) <- sce$Barcode
numbat_classify <- readRDS("results/numbat_tumour_classification.rds")
sce_melanoma <- readRDS("source_data/final//sce_melanoma_tumor.rds")


melanoma_markers <- readxl::read_xlsx("data/mmc4-2.xlsx", skip = 1)
melanoma_markers <- split(melanoma_markers$Gene, melanoma_markers$Signature)
Nazarian_markers <- read.csv("data/Nazarian_mapk_signature.csv", header = FALSE)
Nazarian_markers <- intersect(Nazarian_markers$V1, rownames(sce_melanoma))

Pratilas_markers <- c("ARID5A", "B4GALT6", "BRIX1", "BYSL", "CCND1", "CD3EAP",
                      "CHSY1",  "DDX21",    "DUSP4",    "DUSP6",    "EGR1", "ELOVL6",
                      "ETV1",   "ETV4", "ETV5", "FLJ10534", "FOS",  "FOSL1",
                      "GEMIN4", "GNL3", "GPR3", "GTPBP4",   "HMGA2",    "NOP16",
                      "IER3",   "CXCL8",    "LIF",  "SH2B3",    "MAFF", "MAP2K3",   "MYC",
                      "PHLDA2", "PLK3", "POLR1C",   "POLR3G",   "PPAN", "PPAT",
                      "PYCRL",  "RRS1", "SLC1A5",   "SLC4A7",   "SPRED2",   "SPRY2",
                      "SPRY4",  "TNC",  "TNFRSF12A",    "WDR3", "YRDC")
Pratilas_markers <- intersect(Pratilas_markers, rownames(sce_melanoma))

Nazarian_scores <- apply(logcounts(sce_melanoma)[Nazarian_markers, ], 2, mean_trim_low, trim = 0.1)
cc_genes <- lapply(Seurat::cc.genes.updated.2019, function(x) intersect(x, rownames(sce_melanoma)))
cc_s_scores <- apply(logcounts(sce_melanoma)[cc_genes$s.genes, ], 2, mean_trim_low, trim = 0.1)
cc_g2m_scores <- apply(logcounts(sce_melanoma)[cc_genes$g2m.genes, ], 2, mean_trim_low, trim = 0.1)
cc_scores <- cc_s_scores + cc_g2m_scores

gene_scores <- lapply(melanoma_markers, function(x) {
  apply(assay(sce_melanoma, "logcounts")[intersect(x, rownames(sce_melanoma)), ], 2,
        mean_trim_low, trim = 0.1)
})



melanoma_color_coarse <- RColorBrewer::brewer.pal(8, "Dark2")[c(4, 6, 5, 3, 8)]
names(melanoma_color_coarse) <- c(names(gene_scores)[c(1, 4, 2, 6)], "unassigned")

hallmarkList <- readRDS("data/hallmarkList.rds")
hallmarkList <- lapply(hallmarkList, function(x) intersect(x, rownames(sce_melanoma)))
hallmark_scores <- lapply(hallmarkList[c("HALLMARK_INFLAMMATORY_RESPONSE", "HALLMARK_APOPTOSIS")], function(x) {
  apply(logcounts(sce_melanoma)[intersect(x, rownames(sce_melanoma)), ], 2, mean_trim_low, trim = 0.1)
})



# GO:0097527 necroptotic
retrieved_0097527 <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:0097527", columns="SYMBOL")
markers_necroptotic <- unique(retrieved_0097527$SYMBOL)
necroptotic_scores <- apply(logcounts(sce_melanoma)[markers_necroptotic, ], 2, mean_trim_low, trim = 0.1)
hist(necroptotic_scores)

#GO: 2001238 extrinsic apoptotic

retrieved_2001238 <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:2001238", columns="SYMBOL")
markers_extrinsic_apoptotic <- intersect(rownames(sce_melanoma), unique(retrieved_2001238$SYMBOL))
extrinsic_apoptotic_scores <- apply(logcounts(sce_melanoma)[markers_extrinsic_apoptotic, ], 2, mean_trim_low, trim = 0.1)
hist(extrinsic_apoptotic_scores, breaks = 100)

cor(extrinsic_apoptotic_scores, cc_scores)
## [1] 0.3571884
cor(extrinsic_apoptotic_scores, hallmark_scores$HALLMARK_APOPTOSIS)
## [1] 0.7230966
plot(extrinsic_apoptotic_scores, cc_scores)

#GO: 2001238 intrinsic 2001244

retrieved_2001244 <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:2001244", columns="SYMBOL")
markers_intrinsic_apoptotic <- intersect(rownames(sce_melanoma), unique(retrieved_2001244$SYMBOL))
intrinsic_apoptotic_scores <- apply(logcounts(sce_melanoma)[markers_intrinsic_apoptotic, ], 2, mean_trim_low, trim = 0.1)
hist(intrinsic_apoptotic_scores, breaks = 100)

cor(intrinsic_apoptotic_scores, cc_scores)
## [1] 0.1790809
cor(intrinsic_apoptotic_scores, hallmark_scores$HALLMARK_APOPTOSIS)
## [1] 0.6521549
plot(intrinsic_apoptotic_scores, cc_scores)

plot(intrinsic_apoptotic_scores + extrinsic_apoptotic_scores, cc_scores)

#GO: 0010508

retrieved_0010508 <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:0010508", columns="SYMBOL")
markers_autophagy <- intersect(rownames(sce_melanoma), unique(retrieved_0010508$SYMBOL))
autophagy_scores <- apply(logcounts(sce_melanoma)[markers_autophagy, ], 2, mean_trim_low, trim = 0.1)
hist(autophagy_scores, breaks = 100)

cor(autophagy_scores, cc_scores)
## [1] 0.1550935
cor(autophagy_scores, hallmark_scores$HALLMARK_APOPTOSIS)
## [1] 0.7332516
plot(autophagy_scores, cc_scores)

celldeath_scores <- cbind(autophagy_scores, intrinsic_apoptotic_scores, extrinsic_apoptotic_scores, necroptotic_scores)
celldeath_scores <- rowMeans(apply(celldeath_scores, 2, function(x) (x - min(x))/(max(x) - min(x))))
cor(celldeath_scores, cc_scores)
## [1] 0.2473353
apoptosis <- celldeath_scores
proliferation <- cc_s_scores + cc_g2m_scores
proliferation <- minMax(proliferation)


axl_program <- readxl::read_xlsx("data/aad0501_table_s8.xlsx")
axl_program <- unlist(axl_program$`Tables S8. Genes in the AXL program from single cell analysis.`)[-c(1:3)]
mitf_program <- readxl::read_xlsx("data/aad0501_table_s7.xlsx")
mitf_program <- unlist(mitf_program$`Tables S7. Genes in the MITF program from single cell analysis.`)[-c(1:3)]
axl_program <- intersect(axl_program, rownames(sce_melanoma))
mitf_program <- intersect(mitf_program, rownames(sce_melanoma))
axl_scores <- apply(logcounts(sce_melanoma)[axl_program, ], 2, mean_trim_low, trim = 0.1)
mitf_scores <- apply(logcounts(sce_melanoma)[mitf_program, ], 2, mean_trim_low, trim = 0.1)
sce_melanoma_DMSO <- sce_melanoma[, sce_melanoma$Condition == "DMSO"]
sce_melanoma_DMSO <- sce_melanoma_DMSO[, !sce_melanoma_DMSO$scClassify_tumour_prediction %in% c("unassigned", "intermediate")]
sce_melanoma_DMSO
## class: SingleCellExperiment 
## dim: 30986 61514 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(30986): FAM138A OR4F5 ... AC213203.1 FAM231C
## rowData names(3): ID Symbol Type
## colnames(61514): AAACCCAAGAAACTGT-1 AAACGAAAGGGAGTTC-1 ...
##   TTTGTTGGTGGCTTGC-19 TTTGTTGTCGGTTCAA-19
## colData names(20): Sample Barcode ... Sample_publish
##   scClassify_tumour_prediction
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):
sce_melanoma_DT <- sce_melanoma[, sce_melanoma$Condition == "DT"]
sce_melanoma_DT <- sce_melanoma_DT[, !sce_melanoma_DT$scClassify_tumour_prediction %in% c("unassigned", "intermediate")]
sce_melanoma_DT
## class: SingleCellExperiment 
## dim: 30986 48219 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(30986): FAM138A OR4F5 ... AC213203.1 FAM231C
## rowData names(3): ID Symbol Type
## colnames(48219): AAACGAAGTCCAGCGT-2 AAACGAATCCATCTCG-2 ...
##   TTTGTTGGTTGACGGA-20 TTTGTTGTCGGTGTTA-20
## colData names(20): Sample Barcode ... Sample_publish
##   scClassify_tumour_prediction
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):

2 Signature exploration

all_marker <- append(melanoma_markers, list(AXL = axl_program, MITF = mitf_program))
library(Seurat)
seu_obj <- CreateSeuratObject(counts = counts(sce_melanoma))
seu_obj <- Seurat::AddModuleScore(seu_obj, features = all_marker)
seu_scores <- seu_obj@meta.data[, grep("Cluster", colnames(seu_obj@meta.data))]
colnames(seu_scores) <- names(all_marker)
pheatmap(cor(seu_scores))

pheatmap(cor(do.call(cbind, gene_scores)))

df_toPlot_melanoma <- moon::makeMoonDF(sce_melanoma)

ggplot(df_toPlot_melanoma, aes(x = gene_scores$`Undifferentiated-Neural crest-like`,
                               y = gene_scores$Melanocytic,
                               color = Condition)) +
  geom_scattermore(pointsize = 1) +
  facet_wrap(~Patient, scale = "free", nrow = 1) +
  scale_color_tableau() +
  theme(aspect.ratio = 1)

library(fgsea)
runFGSEA <- function(stats, pathways, scoreType = "std") {
  fgseaRes <- fgsea(pathways = pathways,
                    stats    = sort(stats),
                    minSize = 5,
                    maxSize = 10000,
                    nproc = 1,
                    scoreType = scoreType,
                    nperm = 100000)
  fgseaRes[order(fgseaRes$ES, decreasing = TRUE),]
}
.n_cells <- function(x) {
  y <- int_colData(x)$n_cells
  if (is.null(y)) return(NULL)
  if (length(metadata(x)$agg_pars$by) == 2)
    y <- as.matrix(data.frame(y, check.names = FALSE))
  return(as.table(y))
}

# limma trend
library(limma)
limma_trend <- function(exprsMat, group_id, coldata, coef = 2, covariate = NULL,  weights = NULL) {
  keep <- rowMeans(exprsMat != 0) > 0.2
  keep_sample <- colSums(exprsMat) != 0
  
  group_id <- group_id[keep_sample]
  coldata <- coldata[keep_sample, ]
  
  
  y <- methods::new("EList")
  y$E <- scater::normalizeCounts(exprsMat[keep, keep_sample])
  #y$E <- (exprsMat[keep, keep_sample])
  if (!is.null(covariate)) {
    fm1 <- as.formula(paste("~ group_id +", paste(covariate, collapse=" + ")))
    design <- stats::model.matrix(fm1, data = coldata)
  } else {
    design <- stats::model.matrix(~group_id)
  }
  
  
  if (!is.null(weights)) {
    weights <- weights[keep_sample]
    fit <- limma::lmFit(y, design = design, weights = weights)
  } else {
    fit <- limma::lmFit(y, design = design)
  }
  
  fit <- limma::eBayes(fit, trend = TRUE, robust = TRUE)
  
  
  tt <- lapply(coef, function(x) {
    res <- limma::topTable(fit, n = Inf, 
                           adjust.method = "BH", 
                           coef = x)
    res$Group <- gsub("group_id", "",  colnames(design)[x])
    res
  })
  names(tt) <-  gsub("group_id", "",  colnames(design)[coef])
  
  # tt_group2 <- limma::topTable(fit, n = Inf, adjust.method = "BH", 
  #                              coef = 3)
  # 
  return(tt)
}

library(edgeR)

# edgeR
edgeR <- function(exprsMat, group_id, coldata, covariate = NULL, coef = 2) {
  keep <- rowMeans(exprsMat != 0) > 0.2
  keep_sample <- colSums(exprsMat) != 0
  
  y <- exprsMat[keep, keep_sample]
  group_id <- group_id[keep_sample]
  coldata <- coldata[keep_sample, ]
  y <- suppressMessages(DGEList(y, 
                                group = group_id, 
                                remove.zeros = TRUE))
  
  
  if (!is.null(covariate)) {
    fm1 <- as.formula(paste("~ group_id +", paste(covariate, collapse=" + ")))
    design <- stats::model.matrix(fm1, data = coldata)
  } else {
    design <- stats::model.matrix(~group_id)
  }
  
  y <- estimateDisp(y, design = design)
  fit <- glmQLFit(y, design = design)
  qlf <- glmQLFTest(fit, coef = coef)
  
  
  tt <- lapply(coef, function(x) {
    qlf <- glmQLFTest(fit, coef = x)
    tt <- topTags(qlf, n = Inf)[[1]]
    tt$Group <- gsub("group_id", "",  colnames(design)[x])
    tt
  })
  names(tt) <-  gsub("group_id", "",  colnames(design)[coef])
  
  
  return(tt)
}


library(MAST)
library(lme4)
library(doParallel)
MAST_ind <- function(sce, covariate, ncores = 10, exprs_prop = 0.05) {
  
  library(RhpcBLASctl)
  blas_set_num_threads(1)
  
  options(mc.cores = ncores)
  keep <- rowMeans(counts(sce) != 0) > exprs_prop
  sce <- sce[keep, ]
  sca <- FromMatrix(as.matrix(logcounts(sce)),
                    colData(sce),
                    rowData(sce))
  fm1 <- as.formula(paste("~ group_id +  (1 | sample_id) +", paste(covariate, collapse=" + ")))
  # 
  # lmres <- pbmclapply(1:nrow(exprs_mat), function(x) {
  #   x <- exprs_mat[x, ]
  #   lmfit <- lmerTest::lmer(fm1)
  # }, mc.cores = 10)
  # 
  mod <- zlm(formula = fm1, sca = sca, 
             method = 'glmer', 
             ebayes = FALSE, 
             parallel = TRUE)
  #lrt_res <- lrTest(mod, "diagnosis")
  #stopCluster(cl)
  return(mod)
}

3 Using Pratilas + Nazarian scores to define responder vs non-responder

df_toPlot_melanoma$scClassify_tumour_prediction2 <- df_toPlot_melanoma$scClassify_tumour_prediction
df_toPlot_melanoma$scClassify_tumour_prediction2[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% c("Melanocytic", "Transitory")] <- "Melanocytic/Transitory"
df_toPlot_melanoma$celltype_patient2 <- paste(df_toPlot_melanoma$scClassify_tumour_prediction2, df_toPlot_melanoma$Sample, sep = "|")
combine_scores <- apply(logcounts(sce_melanoma)[intersect(Pratilas_markers, Nazarian_markers), ], 2, mean_trim_low, trim = 0.1)
df_toPlot_melanoma$combine_scores <- combine_scores
ggplot(df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate"), ], 
       aes(x = scClassify_tumour_prediction, 
           y = combine_scores, fill = Condition)) +
  geom_boxplot() +
  scale_fill_tableau()

wilcox.test(df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"] ~ df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"] by df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"]
## W = 648353389, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"] ~ df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"] by df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"]
## W = 240986058, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"] ~ df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"] by df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"]
## W = 63701908, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
t.test(df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"] ~ df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"])
## 
##  Welch Two Sample t-test
## 
## data:  df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"] by df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Melanocytic/Transitory"]
## t = 186.74, df = 44606, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group DMSO and group DT is not equal to 0
## 95 percent confidence interval:
##  0.4432088 0.4526112
## sample estimates:
## mean in group DMSO   mean in group DT 
##          0.7812921          0.3333821
t.test(df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"] ~ df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"])
## 
##  Welch Two Sample t-test
## 
## data:  df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"] by df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Neural crest-like"]
## t = 176.5, df = 33235, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group DMSO and group DT is not equal to 0
## 95 percent confidence interval:
##  0.5205107 0.5322008
## sample estimates:
## mean in group DMSO   mean in group DT 
##          0.8802473          0.3538915
t.test(df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"] ~ df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"])
## 
##  Welch Two Sample t-test
## 
## data:  df_toPlot_melanoma$combine_scores[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"] by df_toPlot_melanoma$Condition[df_toPlot_melanoma$scClassify_tumour_prediction2 %in% "Undifferentiated"]
## t = 15.376, df = 16246, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group DMSO and group DT is not equal to 0
## 95 percent confidence interval:
##  0.08445261 0.10913078
## sample estimates:
## mean in group DMSO   mean in group DT 
##          0.9324362          0.8356445
ggplot(df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction2 %in% c("unassigned", "intermediate"), ], 
       aes(x = scClassify_tumour_prediction2, 
           y = combine_scores, fill = Condition)) +
  geom_boxplot() +
  scale_fill_tableau() +
  xlab("") +
  ylab("score") +
  labs(color = "")

ggsave(file.path(figures_dir, "boxplot_combine_nazarian_pratillas_scores.pdf"), width = 6, height = 5)
df_source <- df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction2 %in% c("unassigned", "intermediate"), ]
write.csv(df_source,
          file.path(sourcedata_dir, "boxplot_combine_nazarian_pratillas_scores.csv"))
aggregate(df_source$combine_scores, list(df_source$Condition, df_source$scClassify_tumour_prediction2), median)
##   Group.1                Group.2         x
## 1    DMSO Melanocytic/Transitory 0.7764467
## 2      DT Melanocytic/Transitory 0.3177287
## 3    DMSO      Neural crest-like 0.8800980
## 4      DT      Neural crest-like 0.3396001
## 5    DMSO       Undifferentiated 0.9316015
## 6      DT       Undifferentiated 0.7794290
df_toPlot_melanoma$celltype_patient <- paste(df_toPlot_melanoma$scClassify_tumour_prediction, df_toPlot_melanoma$Sample, sep = "|")

ggplot(df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate") & df_toPlot_melanoma$celltype_patient %in% names(which(table(df_toPlot_melanoma$celltype_patient) >= 50)), ], 
       aes(x = scClassify_tumour_prediction, 
           y = combine_scores, fill = Condition)) +
  geom_boxplot() +
  scale_fill_tableau() +
  facet_wrap(~Patient, ncol = 5, nrow = 2)

ggplot(df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate") & df_toPlot_melanoma$celltype_patient2 %in% names(which(table(df_toPlot_melanoma$celltype_patient2) >= 50)), ], 
       aes(x = scClassify_tumour_prediction2, 
           y = combine_scores, fill = Condition)) +
  geom_boxplot() +
  scale_fill_tableau() +
  facet_wrap(Sample.type~Patient_publish, ncol = 5, nrow = 2) +
  xlab("") +
  ylab("scores") +
  theme(aspect.ratio = 1, axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

ggsave(file.path(figures_dir, "boxplot_combine_nazarian_pratillas_scores_per_patient.pdf"), width = 15, height = 6)
keep_idx <- !df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate") & df_toPlot_melanoma$celltype_patient2 %in% names(which(table(df_toPlot_melanoma$celltype_patient2) >= 50))
agg_combine <- aggregate(combine_scores[keep_idx],
                         list(df_toPlot_melanoma[keep_idx, ]$scClassify_tumour_prediction2,
                              df_toPlot_melanoma[keep_idx, ]$Sample), 
                         mean)
colnames(agg_combine) <- c("celltype", "sample", "nazarian_scores")
agg_combine$condition <- ifelse(grepl("DMSO", agg_combine$sample), "DMSO", "DT")
agg_combine$patient <- gsub("_DMSO|_DT", "", agg_combine$sample)
agg_combine <- dcast(agg_combine, celltype + patient~condition, value.var = "nazarian_scores")
agg_combine$diff <- agg_combine$DMSO - agg_combine$DT
print("Difference compare across cell type")
## [1] "Difference compare across cell type"
lm_res <- lmerTest::lmer(diff~celltype + (1 | patient), data = agg_combine)
summary(lm_res)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: diff ~ celltype + (1 | patient)
##    Data: agg_combine
## 
## REML criterion at convergence: -24.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.89014 -0.56308  0.05689  0.39442  1.54573 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  patient  (Intercept) 0.011736 0.10833 
##  Residual             0.007556 0.08692 
## Number of obs: 25, groups:  patient, 10
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                0.412051   0.048954 12.833592   8.417  1.4e-06 ***
## celltypeNeural crest-like  0.002822   0.046777  7.966246   0.060   0.9534    
## celltypeUndifferentiated  -0.124797   0.044479  8.039157  -2.806   0.0229 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cllNc-
## clltypNcrs- -0.519       
## clltypUndff -0.562  0.571
agg_combine_quantile20 <- aggregate(combine_scores[keep_idx],
                                    list(df_toPlot_melanoma[keep_idx, ]$scClassify_tumour_prediction2,
                                         df_toPlot_melanoma[keep_idx, ]$Sample), 
                                    quantile, 0.15)
colnames(agg_combine_quantile20) <- c("celltype", "sample", "combine_scores")
agg_combine_quantile20$condition <- ifelse(grepl("DMSO", agg_combine_quantile20$sample), "DMSO", "DT")
agg_combine_quantile20$patient <- gsub("_DMSO|_DT", "", agg_combine_quantile20$sample)
agg_combine_quantile20 <- dcast(agg_combine_quantile20, celltype + patient~condition, value.var = "combine_scores")

agg_combine_quantile20$celltype_patient <- paste(agg_combine_quantile20$celltype, agg_combine_quantile20$patient, sep = "|")
df_toPlot_melanoma$responder <- NA

for (i in 1:nrow(agg_combine_quantile20)) {
  print(i)
  idx <- df_toPlot_melanoma$celltype_patient2 == paste(agg_combine_quantile20$celltype_patient[i], "DT", sep = "_") & df_toPlot_melanoma$combine_scores > agg_combine_quantile20[1, "DMSO"]
  print(sum(idx))
  df_toPlot_melanoma$responder[idx] <- "nonresponder"
}
## [1] 1
## [1] 158
## [1] 2
## [1] 130
## [1] 3
## [1] 117
## [1] 4
## [1] 23
## [1] 5
## [1] 3
## [1] 6
## [1] 265
## [1] 7
## [1] 200
## [1] 8
## [1] 145
## [1] 9
## [1] 258
## [1] 10
## [1] 2
## [1] 11
## [1] 1
## [1] 12
## [1] 6
## [1] 13
## [1] 78
## [1] 14
## [1] 8
## [1] 15
## [1] 32
## [1] 16
## [1] 57
## [1] 17
## [1] 832
## [1] 18
## [1] 204
## [1] 19
## [1] 49
## [1] 20
## [1] 68
## [1] 21
## [1] 13
## [1] 22
## [1] 22
## [1] 23
## [1] 76
## [1] 24
## [1] 2762
## [1] 25
## [1] 159
## [1] 26
## [1] 372
agg_combine_quantile10 <- aggregate(combine_scores[keep_idx],
                                    list(df_toPlot_melanoma[keep_idx, ]$scClassify_tumour_prediction2,
                                         df_toPlot_melanoma[keep_idx, ]$Sample), 
                                    quantile, 0.05)
colnames(agg_combine_quantile10) <- c("celltype", "sample", "combine_scores")
agg_combine_quantile10$condition <- ifelse(grepl("DMSO", agg_combine_quantile10$sample), "DMSO", "DT")
agg_combine_quantile10$patient <- gsub("_DMSO|_DT", "", agg_combine_quantile10$sample)
agg_combine_quantile10 <- dcast(agg_combine_quantile10, celltype + patient~condition, value.var = "combine_scores")

agg_combine_quantile10$celltype_patient <- paste(agg_combine_quantile10$celltype, agg_combine_quantile10$patient, sep = "|")

for (i in 1:nrow(agg_combine_quantile10)) {
  print(i)
  idx <- df_toPlot_melanoma$celltype_patient2 == paste(agg_combine_quantile10$celltype_patient[i], "DT", sep = "_") & df_toPlot_melanoma$combine_scores < agg_combine_quantile10[1, "DT"]
  print(sum(idx))
  df_toPlot_melanoma$responder[idx] <- "responder"
}
## [1] 1
## [1] 49
## [1] 2
## [1] 3728
## [1] 3
## [1] 1175
## [1] 4
## [1] 340
## [1] 5
## [1] 486
## [1] 6
## [1] 232
## [1] 7
## [1] 539
## [1] 8
## [1] 39
## [1] 9
## [1] 545
## [1] 10
## [1] 32
## [1] 11
## [1] 5
## [1] 12
## [1] 228
## [1] 13
## [1] 831
## [1] 14
## [1] 940
## [1] 15
## [1] 8
## [1] 16
## [1] 76
## [1] 17
## [1] 37
## [1] 18
## [1] 53
## [1] 19
## [1] 32
## [1] 20
## [1] 15
## [1] 21
## [1] 16
## [1] 22
## [1] 49
## [1] 23
## [1] 216
## [1] 24
## [1] 76
## [1] 25
## [1] 23
## [1] 26
## [1] 73
table(df_toPlot_melanoma$responder, df_toPlot_melanoma$scClassify_tumour_prediction2)
##               
##                intermediate Melanocytic/Transitory Neural crest-like unassigned
##   nonresponder            0                    896               587          0
##   responder               0                   6549              2704          0
##               
##                Undifferentiated
##   nonresponder             4557
##   responder                 590
df_toPlot_melanoma$responder[is.na(df_toPlot_melanoma$responder)] <- "others"
ggplot(df_toPlot_melanoma[df_toPlot_melanoma$Condition == "DT" &
                            !df_toPlot_melanoma$scClassify_tumour_prediction2 %in% c("unassigned", "intermediate"), ], 
       aes(x = Patient_publish, fill = responder)) +
  geom_bar(position = "fill") +
  coord_flip() +
  scale_fill_manual(values = c("#E15759","grey", "#76B7B2")) +
  facet_wrap(scClassify_tumour_prediction2~Sample.type, scale = "free", ncol = 2) +
  theme(aspect.ratio = 0.4) +
  xlab("") +
  ylab("Proportion")

ggsave(file.path(figures_dir, "barplot_resonder_proportion_per_patient.pdf"), width = 8, height = 6)
df_subset <- df_toPlot_melanoma[df_toPlot_melanoma$Condition == "DT" &
                                  !df_toPlot_melanoma$scClassify_tumour_prediction2 %in% c("unassigned", "intermediate"), ]
tab <- table(df_subset$scClassify_tumour_prediction2,
             df_subset$responder,
             df_subset$Patient_publish)
freq_celltype_prop <- lapply(1:dim(tab)[3], function(i) {
  apply(tab[ , , i], 1, function(x) x/sum(x))
})
names(freq_celltype_prop) <- dimnames(tab)[[3]]
df_freq_celltype_prop <- melt(freq_celltype_prop)
df_count <- melt(tab)
colnames(df_freq_celltype_prop) <- c("responder", "cellstate", "freq", "patient")
colnames(df_count) <- c("cellstate", "responder", "patient", "counts")
df_count <- merge(df_freq_celltype_prop, df_count)
write.csv(df_count, file.path(sourcedata_dir, "resonder_proportion_per_patient.csv"))

3.1 edgeR

sce <- sce_melanoma
sce$sample_id <- factor(paste(sce_melanoma$Sample, df_toPlot_melanoma$responder, sep = "_"))
sce$cluster_id <- factor(df_toPlot_melanoma$scClassify_tumour_prediction2)
sce$group_id <- df_toPlot_melanoma$responder
metadata(sce)$experiment_info <- unique(colData(sce)[, c("sample_id", "group_id")])

library(muscat)
pb <- aggregateData(sce,
                    assay = "counts", fun = "sum",
                    by = c("cluster_id", "sample_id"))
pb <- pb[, grep("DT_responder|DT_nonresponder", colnames(pb))]
edger_weights_res <- lapply(assayNames(pb)[c(2, 3, 5)], function(x) {
  edgeR(assay(pb, x),
        group_id = pb$group_id,
        coldata = colData(pb),
        covariate = c("Sample"),
        #weights = sample_weights[x, colnames(pb)],
        coef = 2)
})
names(edger_weights_res) <- assayNames(pb)[c(2, 3, 5)]
unlist(lapply(edger_weights_res, function(x) sum(x$responder$FDR<0.05)))
## Melanocytic/Transitory      Neural crest-like       Undifferentiated 
##                    865                    282                    744
unlist(lapply(edger_weights_res, function(x) sum((x$responder$logFC) > 1)))
## Melanocytic/Transitory      Neural crest-like       Undifferentiated 
##                    726                    631                   1101
unlist(lapply(edger_weights_res, function(x) sum((x$responder$logFC) < -2)))
## Melanocytic/Transitory      Neural crest-like       Undifferentiated 
##                    382                    172                    826
hallmark_fgsea_list <- lapply(edger_weights_res, function(x) {
  x <- x$responder
  logfc <- x$logFC
  names(logfc) <- rownames(x)
  logfc <- na.omit(logfc)
  hallmark_fgsea <- runFGSEA(logfc, hallmarkList)
  hallmark_fgsea <- data.frame(hallmark_fgsea)
})
hallmark_fgsea_list <- lapply(hallmark_fgsea_list, function(x) {
  rownames(x) <- x[, 1]
  x
})
df_nes <- do.call(cbind, lapply(hallmark_fgsea_list, function(x) x[names(hallmarkList), "NES"]))
rownames(df_nes) <- names(hallmarkList)
colnames(df_nes) <- names(edger_weights_res)
df_pval <- do.call(cbind, lapply(hallmark_fgsea_list, function(x) x[names(hallmarkList), "pval"]))
rownames(df_pval) <- names(hallmarkList)
colnames(df_pval) <- names(edger_weights_res)

df_nes <- melt(df_nes)
df_pval <- melt(df_pval)
df_nes <- merge(df_nes, df_pval, by = c("Var1", "Var2"))

colnames(df_nes) <- c("pathway", "celltype", "NES", "pval")
df_nes_agg <- df_nes %>%
  group_by(pathway) %>%
  summarise(NES = mean(NES))
keep_pathways <- c(as.character(df_nes_agg[which(df_nes_agg$NES >= sort(df_nes_agg$NES, decreasing = TRUE)[5]),]$pathway),
                   as.character(df_nes_agg[which(df_nes_agg$NES <= sort(df_nes_agg$NES)[10]),]$pathway))
keep_pathways <- unique(keep_pathways)
saveRDS(keep_pathways, file = "results/selected_pathways_inhouse_figure4c.rds")
library(scales)
ggplot(df_nes, aes(x = celltype, 
                   y = gsub("_", " ", gsub("HALLMARK_", "", pathway)), 
                   color = NES, size = -log10(pval))) +
  geom_point() +
  scale_color_gradientn(colors = rdbu, limits = c(-2.9, 1.5), 
                        values = rescale(c(-2.9, 0, 1.5))) +
  labs(title = "") +
  theme(aspect.ratio = 3.5, axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  xlab("") +
  ylab("") 

ggsave(file.path(figures_dir, "dotplot_edgeR_nazarian_combined_scores.pdf"), width = 6, height = 12)
write.csv(df_nes,
          file.path(sourcedata_dir, "dotplot_edgeR_nazarian_combined_scores.csv"))


df_nes_subset <- df_nes[df_nes$pathway %in% keep_pathways, ]
df_nes_subset$pathway <- factor(df_nes_subset$pathway, levels = as.character(df_nes_agg$pathway[order(df_nes_agg$NES)]))
df_nes_subset$pathway <- droplevels(df_nes_subset$pathway)
library(scales)
ggplot(df_nes_subset, aes(x = celltype, 
                          y = gsub("_", " ", gsub("HALLMARK_", "", pathway)), 
                          color = NES, size = -log10(pval))) +
  geom_point() +
  scale_color_gradientn(colors = rdbu, limits = c(-2.9, 1.5), 
                        values = rescale(c(-2.9, 0, 1.5))) +
  labs(title = "") +
  theme(aspect.ratio = 3.5, axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  xlab("") +
  ylab("") 

ggsave(file.path(figures_dir, "dotplot_edgeR_nazarian_combined_scores_selected.pdf"), width = 6, height = 6)
write.csv(df_nes_subset,
          file.path(sourcedata_dir, "dotplot_edgeR_nazarian_combined_scores_selected.csv"))

4 Hallmark

hallmark_scores <- lapply(hallmarkList, function(x) {
  apply(logcounts(sce_melanoma)[intersect(x, rownames(sce_melanoma)), ], 2, mean_trim_low, trim = 0.1)
})
df_melanoma_subset <- df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate"), ]
df_melanoma_subset$HALLMARK_IL6_JAK_STAT3_SIGNALING <- hallmark_scores$HALLMARK_IL6_JAK_STAT3_SIGNALING[rownames(df_melanoma_subset)]
g1 <- ggplot(df_melanoma_subset, 
             aes(x = scClassify_tumour_prediction2, 
                 y = HALLMARK_IL6_JAK_STAT3_SIGNALING, fill = responder)) +
  geom_boxplot() +
  xlab("") +
  scale_fill_manual(values = c("#E15759","grey", "#76B7B2")) +
  ylab("IL6 JAK STAT3 SIGNALING") +
  theme(aspect.ratio = 1)


df_melanoma_subset <- df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate"), ]
df_melanoma_subset$HALLMARK_TNFA_SIGNALING_VIA_NFKB <- hallmark_scores$HALLMARK_TNFA_SIGNALING_VIA_NFKB[rownames(df_melanoma_subset)]
g2 <- ggplot(df_melanoma_subset, 
             aes(x = scClassify_tumour_prediction2, 
                 y = HALLMARK_TNFA_SIGNALING_VIA_NFKB, fill = responder)) +
  geom_boxplot() +
  xlab("") +
  scale_fill_manual(values = c("#E15759","grey", "#76B7B2")) +
  ylab("TNFA SIGNALING VIA NFKB") +
  theme(aspect.ratio = 1)

ggarrange(g1, g2, ncol = 2, nrow = 1, align = "hv", common.legend = TRUE)

ggsave(file.path(figures_dir, "boxplot_IL6_TNF_scores.pdf"), width = 10, height = 6)
write.csv(df_melanoma_subset, file.path(sourcedata_dir, "boxplot_IL6_TNF_scores.csv"))
df_melanoma_subset <- df_toPlot_melanoma[!df_toPlot_melanoma$scClassify_tumour_prediction %in% c("unassigned", "intermediate"), ]
g <- lapply(names(hallmark_scores), function(i) {
  
  df_melanoma_subset[, i] <- hallmark_scores[[i]][rownames(df_melanoma_subset)]
  ggplot(df_melanoma_subset, 
         aes_string(x = "scClassify_tumour_prediction2", 
                    y = i, fill = "responder")) +
    geom_boxplot() +
    xlab("") +
    scale_fill_manual(values = c("#E15759","grey", "#76B7B2")) +
    ylab(i) +
    theme(aspect.ratio = 1)
  
})

ggarrange(plotlist = g, ncol = 9, nrow = 6, align = "hv", common.legend = TRUE)

ggsave(file.path(figures_dir, "boxplot_all_hallmark_scores.pdf"), width = 50, height = 30, limitsize = FALSE)
df_melanoma_subset <- cbind(df_melanoma_subset, do.call(cbind, hallmark_scores)[rownames(df_melanoma_subset), ])
write.csv(df_melanoma_subset, file.path(sourcedata_dir, "boxplot_all_hallmark_scores.csv"))